#Chapter 3, Transformations

#GE 1:collapsing numeric into substantively useful categories
describe(nes$age,weights=nes$wt)
nes$age5 = cut2(nes$age, c(31,41,51,61,94))
levels(nes$age5)=c("18-30","31-40","41-50","51-60","61-")
levels(nes$age5)
nes$age5=as.ordered(nes$age5)
freq(nes$age5,nes$wt, plot=F)

#Box or endnote?: How R orders levels by sorting on numbers, then letters. Works 
#out okay in above, but not always. How to reorder factor levels? RIAp31

#GE 2: Collapsing numeric into roughly equal-sized groups (quantiles)
freq(nes$income.r, nes$wt)
nes$income.rn=as.numeric(nes$income.r)
nes$income4.r=cut2(nes$income.rn,g=4)
freq(nes$income4.r,nes$wt)
levels(nes$income4.r)=c("Low","MedLow","MedHigh","High")
nes$income4.r=as.ordered(nes$income4.r)
freq(nes$income4.r,nes$wt)

#GE 3: Creating an indicator variable.
freq(nes$marital, nes$wt)
nes$married=as.numeric(nes$marital=="1. Married")
class(nes$marital)
freq(nes$married,nes$wt)
#w/o as.numeric above, R returns FALSE and TRUE, not 0 and 1. No worries
#wtd.mean(nes$married, nes$wt)
freq(nes$married,nes$wt)
nes$married.f=as.factor(nes$married)
levels(nes$married.f)=c("No","Yes")
freq(nes$married.f, nes$wt)

#GE 4: Additive index or scale
freq (nes$goal.democ,      nes$wt, plot=F)
freq (nes$goal.humanrights,nes$wt, plot=F)
freq (nes$goal.hunger,     nes$wt, plot=F)
freq (nes$goal.UN,         nes$wt, plot=F)
nes$goals.intl=nes$goal.democ + nes$goal.humanrights + 
               nes$goal.hunger + nes$goal.UN
#introduce the with() function:
nes$goals.intl=with(nes,(goal.democ+goal.humanrights+
                         goal.hunger+goal.UN))

freq (nes$goals.intl, nes$wt, plot=F)
#Now collapse 4=3
nes$goals.intl = cut2(nes$goals.intl, c(1,2,3,5))
levels(nes$goals.intl)=c("Low","MedLow","MedHigh","High")#same as income above
#label (nes$goals.intl)="Internationism Scale"
nes$goals.intl=as.ordered(nes$goals.intl)
freq(nes$goals.intl,nes$wt)

#Exercise 1: polviews-->polview3
#gss$polview3=NULL
freq(gss$polviews,gss$wtss,plot=F)
class(gss$polviews)
gss$polviews.n=as.numeric(gss$polviews)
freq(gss$polviews.n, gss$wtss,plot=F)
gss$polview3=cut2(gss$polviews.n, c(4,5,8))
freq (gss$polview3,gss$wtss,plot=F)
levels(gss$polview3)=c("Liberal","Moderate","Conservative")
gss$polview3=as.ordered(gss$polview3)
printC(freq (gss$polview3,gss$wtss))

#Exercise 2:gss$income06-->gss$income06.n4
freq(gss$income06,gss$wtss, plot=F)
gss$income06.n=as.numeric(gss$income06)
gss$income06.n4=cut2(gss$income06.n,g=4)
freq(gss$income06.n4,gss$wtss,plot=F)
levels(gss$income06.n4)=c("Low","MedLow","MedHigh","High")
gss$income06.n4=as.ordered(gss$income06.n4)
freq(gss$income06.n4,gss$wtss)
obj=freq(gss$income06.n4,gss$wtss)
printC(obj)


#Exercise 3: gss$grass-->gss$grass.yes
freq(gss$grass,gss$wtss, plot=F)
levels(gss$grass)
gss$grass.yes=as.numeric(gss$grass=="Legal")
freq(gss$grass.yes,gss$wtss)

#Exercise 4: muslim.tol=mslm.col+mslm.lib+mslm.spk
gss$muslim.tol=with(gss,(mslm.col.n+mslm.lib.n+mslm.spk.n))
freq(gss$muslim.tol, gss$wtss,plot=F)
printC(freq(gss$muslim.tol, gss$wtss,plot=F))
gss$muslim.tol=cut2(gss$muslim.tol,c(4,6,7)) #notice the cut points
levels(gss$muslim.tol)=c("Low","Mid","High")
#label(gss$muslim.tol) <- "Tolerance twrd radical Muslim clerics" 
#This labeling function wrks for functions in Hmisc
#See http://www.statmethods.net/input/variablelables.html
gss$muslim.tol=as.ordered(gss$muslim.tol)
freq(gss$muslim.tol,gss$wtss)
printC(freq(gss$muslim.tol,gss$wtss))

